

################################################################
##########               LOAD PACKAGES               ###########
################################################################

rm(list=ls());gc()

.libPaths("U:/R/Packages/4.0_top")
setwd("\\\\mcm-main/research/Zheng_10223/Joint")




`%notin%`=Negate(`%in%`)


####################################################
# HARRISON START FROM HERE
library(data.table)
library(stringr)
# Import landing tract info 
dftract=fread("H:/Zheng_10223/Joint/Census/landing_tract1982_2005.csv")
# census years: 1986, 1991, 1996, 2006 
dftract$censyear[dftract$cens==81]=1986
dftract$censyear[dftract$cens==86]=1986
dftract$censyear[dftract$cens==91]=1991
dftract$censyear[dftract$cens==96]=1996
dftract$censyear[dftract$cens==1]=2006


# get tract number before the decimal
dftract$firsttractpart=as.integer(dftract$CT_F)
########### Import enclave computation from census
enclaves <- fread("H:/Zheng_10223/Joint/Census/tract_vismin_all.csv")
enclaves[, uniquetract := str_pad(as.character(uniquetract), width = 7L, pad = '0')]
enclaves[, year := as.character(year)]
enclaves[, code := paste0(year,uniquetract)]




dftract[, CT_F := as.integer(CT_F)]
dftract[, cma := str_pad(as.character(CMA_F), width = 3L, pad = '0')]
dftract[, ct  := str_pad(as.character(CT_F), width = 4L, pad = '0')]

dftract[, uniquetract := paste0(cma,ct)]

dftract[, .N, .(cens)][order(cens)]
dftract <- dftract[cens!=81]
dftract[, year := as.character(cens+1900)]
dftract[year=="1901", year := "2001"]
dftract[, .N, .(year)][order(year)]

dftract[, code := paste0(year,uniquetract)]

new <- enclaves[dftract, on = "code"]

new[, .N, .(year)][order(year)]

new[, missing := is.na(ratio_black)]
new[, .N, .(missing)]


# clean enclaves: just need the ratios
enclaves_clean=enclaves[,c("ratio_black","ratio_sa","ratio_ea","ratio_sea","ratio_pi","ratio_wa","ratio_la","ratio_multi","ratio_white","ratio_aborig","code","year")]

# Merge based on code 

dftract$censyear=as.numeric(dftract$censyear); enclaves_clean$censyear=as.numeric(enclaves_clean$year)
dftractmerge=merge(dftract[,c("IMDB_ID","censyear","code")],enclaves_clean,by=c("code","censyear"),all.x=TRUE)

fwrite(dftractmerge,"H:/Zheng_10223/Joint/dftractenclave.csv")

